#include "device.h"
#include "instance.hpp"
#include "util/logs.hpp"
#include "Emu/system_config.h"

#ifdef __APPLE__
#include <MoltenVK/vk_mvk_moltenvk.h>
#endif

namespace vk
{
	// Global shared render device
	const render_device* g_render_device = nullptr;

	void physical_device::get_physical_device_features(bool allow_extensions)
	{
		if (!allow_extensions)
		{
			vkGetPhysicalDeviceFeatures(dev, &features);
			return;
		}

		supported_extensions instance_extensions(supported_extensions::instance);
		supported_extensions device_extensions(supported_extensions::device, nullptr, dev);

		if (!instance_extensions.is_supported(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME))
		{
			vkGetPhysicalDeviceFeatures(dev, &features);
		}
		else
		{
			VkPhysicalDeviceFeatures2KHR features2;
			features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
			features2.pNext = nullptr;

			VkPhysicalDeviceFloat16Int8FeaturesKHR shader_support_info{};
			VkPhysicalDeviceDescriptorIndexingFeatures descriptor_indexing_info{};
			VkPhysicalDeviceAttachmentFeedbackLoopLayoutFeaturesEXT fbo_loops_info{};
			VkPhysicalDeviceFragmentShaderBarycentricFeaturesKHR shader_barycentric_info{};

			if (device_extensions.is_supported(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME))
			{
				shader_support_info.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR;
				features2.pNext           = &shader_support_info;
			}

			if (device_extensions.is_supported(VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME))
			{
				driver_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES_KHR;
				driver_properties.pNext = features2.pNext;
				features2.pNext         = &driver_properties;
			}

			if (device_extensions.is_supported(VK_EXT_DESCRIPTOR_INDEXING_EXTENSION_NAME))
			{
				descriptor_indexing_info.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES_EXT;
				descriptor_indexing_info.pNext = features2.pNext;
				features2.pNext                = &descriptor_indexing_info;
				descriptor_indexing_support    = true;
			}

			if (device_extensions.is_supported(VK_EXT_ATTACHMENT_FEEDBACK_LOOP_LAYOUT_EXTENSION_NAME))
			{
				fbo_loops_info.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ATTACHMENT_FEEDBACK_LOOP_LAYOUT_FEATURES_EXT;
				fbo_loops_info.pNext = features2.pNext;
				features2.pNext      = &fbo_loops_info;
			}

			if (device_extensions.is_supported(VK_KHR_FRAGMENT_SHADER_BARYCENTRIC_EXTENSION_NAME))
			{
				shader_barycentric_info.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADER_BARYCENTRIC_FEATURES_KHR;
				shader_barycentric_info.pNext = features2.pNext;
				features2.pNext               = &shader_barycentric_info;
			}

			auto _vkGetPhysicalDeviceFeatures2KHR = reinterpret_cast<PFN_vkGetPhysicalDeviceFeatures2KHR>(vkGetInstanceProcAddr(parent, "vkGetPhysicalDeviceFeatures2KHR"));
			ensure(_vkGetPhysicalDeviceFeatures2KHR); // "vkGetInstanceProcAddress failed to find entry point!"
			_vkGetPhysicalDeviceFeatures2KHR(dev, &features2);

			shader_types_support.allow_float64 = !!features2.features.shaderFloat64;
			shader_types_support.allow_float16 = !!shader_support_info.shaderFloat16;
			shader_types_support.allow_int8    = !!shader_support_info.shaderInt8;
			framebuffer_loops_support          = !!fbo_loops_info.attachmentFeedbackLoopLayout;
			barycoords_support                 = !!shader_barycentric_info.fragmentShaderBarycentric;
			features                           = features2.features;

			if (descriptor_indexing_support)
			{
#define SET_DESCRIPTOR_BITFLAG(field, bit) if (descriptor_indexing_info.field) descriptor_update_after_bind_mask |= (1ull << bit)
				SET_DESCRIPTOR_BITFLAG(descriptorBindingUniformBufferUpdateAfterBind, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER);
				SET_DESCRIPTOR_BITFLAG(descriptorBindingSampledImageUpdateAfterBind, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER);
				SET_DESCRIPTOR_BITFLAG(descriptorBindingSampledImageUpdateAfterBind, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE);
				SET_DESCRIPTOR_BITFLAG(descriptorBindingStorageImageUpdateAfterBind, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE);
				SET_DESCRIPTOR_BITFLAG(descriptorBindingStorageBufferUpdateAfterBind, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
				SET_DESCRIPTOR_BITFLAG(descriptorBindingUniformTexelBufferUpdateAfterBind, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER);
				SET_DESCRIPTOR_BITFLAG(descriptorBindingStorageTexelBufferUpdateAfterBind, VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER);
#undef SET_DESCRIPTOR_BITFLAG
			}
		}

		stencil_export_support           = device_extensions.is_supported(VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME);
		conditional_render_support       = device_extensions.is_supported(VK_EXT_CONDITIONAL_RENDERING_EXTENSION_NAME);
		external_memory_host_support     = device_extensions.is_supported(VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME);
		sampler_mirror_clamped_support   = device_extensions.is_supported(VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME);
		unrestricted_depth_range_support = device_extensions.is_supported(VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME);
		debug_utils_support              = instance_extensions.is_supported(VK_EXT_DEBUG_UTILS_EXTENSION_NAME);
		surface_capabilities_2_support   = instance_extensions.is_supported(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME);
	}

	void physical_device::get_physical_device_properties(bool allow_extensions)
	{
		vkGetPhysicalDeviceMemoryProperties(dev, &memory_properties);

		if (!allow_extensions)
		{
			vkGetPhysicalDeviceProperties(dev, &props);
			return;
		}

		supported_extensions instance_extensions(supported_extensions::instance);
		if (!instance_extensions.is_supported(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME))
		{
			vkGetPhysicalDeviceProperties(dev, &props);
		}
		else
		{
			VkPhysicalDeviceProperties2KHR properties2;
			properties2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR;
			properties2.pNext = nullptr;

			VkPhysicalDeviceDescriptorIndexingPropertiesEXT descriptor_indexing_props{};

			if (descriptor_indexing_support)
			{
				descriptor_indexing_props.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_PROPERTIES_EXT;
				descriptor_indexing_props.pNext = properties2.pNext;
				properties2.pNext = &descriptor_indexing_props;
			}

			auto _vkGetPhysicalDeviceProperties2KHR = reinterpret_cast<PFN_vkGetPhysicalDeviceProperties2KHR>(vkGetInstanceProcAddr(parent, "vkGetPhysicalDeviceProperties2KHR"));
			ensure(_vkGetPhysicalDeviceProperties2KHR);

			_vkGetPhysicalDeviceProperties2KHR(dev, &properties2);
			props = properties2.properties;

#ifdef __APPLE__
		if (instance_extensions.is_supported(VK_MVK_MOLTENVK_EXTENSION_NAME))
		{
			MVKConfiguration mvk_config = {};
			size_t mvk_config_size = sizeof(MVKConfiguration);

			PFN_vkGetMoltenVKConfigurationMVK _vkGetMoltenVKConfigurationMVK = nullptr;
			_vkGetMoltenVKConfigurationMVK = reinterpret_cast<PFN_vkGetMoltenVKConfigurationMVK>(vkGetInstanceProcAddr(parent, "vkGetMoltenVKConfigurationMVK"));
			ensure(_vkGetMoltenVKConfigurationMVK);

			PFN_vkSetMoltenVKConfigurationMVK _vkSetMoltenVKConfigurationMVK = nullptr;
			_vkSetMoltenVKConfigurationMVK = reinterpret_cast<PFN_vkSetMoltenVKConfigurationMVK>(vkGetInstanceProcAddr(parent, "vkSetMoltenVKConfigurationMVK"));
			ensure(_vkSetMoltenVKConfigurationMVK);

			CHECK_RESULT_EX(_vkGetMoltenVKConfigurationMVK(VK_NULL_HANDLE, &mvk_config, &mvk_config_size), std::string("Could not get MoltenVK configuration."));

			mvk_config.resumeLostDevice = true;
			mvk_config.fastMathEnabled = g_cfg.video.disable_msl_fast_math.get() ? MVK_CONFIG_FAST_MATH_NEVER : MVK_CONFIG_FAST_MATH_ON_DEMAND;

			CHECK_RESULT_EX(_vkSetMoltenVKConfigurationMVK(VK_NULL_HANDLE, &mvk_config, &mvk_config_size), std::string("Could not set MoltenVK configuration."));
		}
		else
		{
			rsx_log.error("Cannot set the MoltenVK configuration because VK_MVK_moltenvk is not supported.\nIf you're using MoltenVK through libvulkan, please manually set the appropriate environment variables instead.");
		}
#endif

			if (descriptor_indexing_support)
			{
				if (descriptor_indexing_props.maxUpdateAfterBindDescriptorsInAllPools < 800'000)
				{
					rsx_log.error("Physical device does not support enough descriptors for deferred updates to work effectively. Deferred updates are disabled.");
					descriptor_update_after_bind_mask = 0;
				}
				else if (descriptor_indexing_props.maxUpdateAfterBindDescriptorsInAllPools < 2'000'000)
				{
					rsx_log.warning("Physical device reports a low amount of allowed deferred descriptor updates. Draw call threshold will be lowered accordingly.");
					descriptor_max_draw_calls = 8192;
				}
			}
		}
	}

	void physical_device::create(VkInstance context, VkPhysicalDevice pdev, bool allow_extensions)
	{
		dev    = pdev;
		parent = context;

		get_physical_device_features(allow_extensions);
		get_physical_device_properties(allow_extensions);

		rsx_log.always()("Found vulkan-compatible GPU: '%s' running on driver %s", get_name(), get_driver_version());

		if (get_driver_vendor() == driver_vendor::RADV && get_name().find("LLVM 8.0.0") != umax)
		{
			// Serious driver bug causing black screens
			// See https://bugs.freedesktop.org/show_bug.cgi?id=110970
			rsx_log.fatal("RADV drivers have a major driver bug with LLVM 8.0.0 resulting in no visual output. Upgrade to LLVM version 8.0.1 or greater to avoid this issue.");
		}
		else if (get_driver_vendor() == driver_vendor::NVIDIA)
		{
#ifdef _WIN32
			// SPIRV bugs were fixed in 452.28 for windows
			const u32 threshold_version = (452u << 22) | (28 << 14);
#else
			// SPIRV bugs were fixed in 450.56 for linux/BSD
			const u32 threshold_version = (450u << 22) | (56 << 14);
#endif
			const auto current_version = props.driverVersion & ~0x3fffu; // Clear patch and revision fields
			if (current_version < threshold_version)
			{
				rsx_log.error("Your current NVIDIA graphics driver version %s has known issues and is unsupported. Update to the latest NVIDIA driver.", get_driver_version());
			}
		}

		if (get_chip_class() == chip_class::AMD_vega)
		{
			// Disable fp16 if driver uses LLVM emitter. It does fine with AMD proprietary drivers though.
			shader_types_support.allow_float16 = (driver_properties.driverID == VK_DRIVER_ID_AMD_PROPRIETARY_KHR);
		}
	}

	std::string physical_device::get_name() const
	{
		return props.deviceName;
	}

	driver_vendor physical_device::get_driver_vendor() const
	{
#ifdef __APPLE__
		// moltenVK currently returns DRIVER_ID_MOLTENVK (0).
		// For now, assume the vendor is moltenVK on Apple devices.
		return driver_vendor::MVK;
#endif

		if (!driver_properties.driverID)
		{
			const auto gpu_name = get_name();

			if (gpu_name.find("Radeon") != umax)
			{
				return driver_vendor::AMD;
			}

			if (gpu_name.find("NVIDIA") != umax || gpu_name.find("GeForce") != umax || gpu_name.find("Quadro") != umax)
			{
				return driver_vendor::NVIDIA;
			}

			if (gpu_name.find("RADV") != umax)
			{
				return driver_vendor::RADV;
			}

			if (gpu_name.find("Intel") != umax)
			{
#ifdef _WIN32
				return driver_vendor::INTEL;
#else
				return driver_vendor::ANV;
#endif
			}

			return driver_vendor::unknown;
		}
		else
		{
			switch (driver_properties.driverID)
			{
			case VK_DRIVER_ID_AMD_PROPRIETARY_KHR:
			case VK_DRIVER_ID_AMD_OPEN_SOURCE_KHR:
				return driver_vendor::AMD;
			case VK_DRIVER_ID_MESA_RADV_KHR:
				return driver_vendor::RADV;
			case VK_DRIVER_ID_NVIDIA_PROPRIETARY_KHR:
				return driver_vendor::NVIDIA;
			case VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR:
				return driver_vendor::INTEL;
			case VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA_KHR:
				return driver_vendor::ANV;
			default:
				// Mobile?
				return driver_vendor::unknown;
			}
		}
	}

	std::string physical_device::get_driver_version() const
	{
		switch (get_driver_vendor())
		{
		case driver_vendor::NVIDIA:
		{
			// 10 + 8 + 8 + 6
			const auto major_version = props.driverVersion >> 22;
			const auto minor_version = (props.driverVersion >> 14) & 0xff;
			const auto patch         = (props.driverVersion >> 6) & 0xff;
			const auto revision      = (props.driverVersion & 0x3f);

			return fmt::format("%u.%u.%u.%u", major_version, minor_version, patch, revision);
		}
		default:
		{
			// 10 + 10 + 12 (standard vulkan encoding created with VK_MAKE_VERSION)
			return fmt::format("%u.%u.%u", (props.driverVersion >> 22), (props.driverVersion >> 12) & 0x3ff, (props.driverVersion) & 0x3ff);
		}
		}
	}

	chip_class physical_device::get_chip_class() const
	{
		return get_chip_family(props.vendorID, props.deviceID);
	}

	u32 physical_device::get_queue_count() const
	{
		if (!queue_props.empty())
			return ::size32(queue_props);

		u32 count = 0;
		vkGetPhysicalDeviceQueueFamilyProperties(dev, &count, nullptr);

		return count;
	}

	const VkQueueFamilyProperties& physical_device::get_queue_properties(u32 queue)
	{
		if (queue_props.empty())
		{
			u32 count = 0;
			vkGetPhysicalDeviceQueueFamilyProperties(dev, &count, nullptr);

			queue_props.resize(count);
			vkGetPhysicalDeviceQueueFamilyProperties(dev, &count, queue_props.data());
		}

		if (queue >= queue_props.size())
			fmt::throw_exception("Bad queue index passed to get_queue_properties (%u)", queue);
		return queue_props[queue];
	}

	const VkPhysicalDeviceMemoryProperties& physical_device::get_memory_properties() const
	{
		return memory_properties;
	}

	const VkPhysicalDeviceLimits& physical_device::get_limits() const
	{
		return props.limits;
	}

	physical_device::operator VkPhysicalDevice() const
	{
		return dev;
	}

	physical_device::operator VkInstance() const
	{
		return parent;
	}

	// Render Device - The actual usable device
	void render_device::create(vk::physical_device& pdev, u32 graphics_queue_idx, u32 present_queue_idx, u32 transfer_queue_idx)
	{
		std::string message_on_error;
		float queue_priorities[1] = { 0.f };
		pgpu = &pdev;

		ensure(graphics_queue_idx == present_queue_idx || present_queue_idx == umax); // TODO
		std::vector<VkDeviceQueueCreateInfo> device_queues;

		auto& graphics_queue = device_queues.emplace_back();
		graphics_queue.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
		graphics_queue.pNext = NULL;
		graphics_queue.flags = 0;
		graphics_queue.queueFamilyIndex = graphics_queue_idx;
		graphics_queue.queueCount = 1;
		graphics_queue.pQueuePriorities = queue_priorities;

		u32 transfer_queue_sub_index = 0;
		if (transfer_queue_idx == umax)
		{
			// Transfer queue must be a valid device queue
			rsx_log.warning("Dedicated transfer+compute queue was not found on this GPU. Will use graphics queue instead.");
			transfer_queue_idx = graphics_queue_idx;

			// Check if we can at least get a second graphics queue
			if (pdev.get_queue_properties(graphics_queue_idx).queueCount > 1)
			{
				rsx_log.notice("Will use a spare graphics queue to push transfer operations.");
				graphics_queue.queueCount++;
				transfer_queue_sub_index = 1;
			}
		}

		m_graphics_queue_family = graphics_queue_idx;
		m_present_queue_family = present_queue_idx;
		m_transfer_queue_family = transfer_queue_idx;

		if (graphics_queue_idx != transfer_queue_idx)
		{
			auto& transfer_queue = device_queues.emplace_back();
			transfer_queue.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
			transfer_queue.pNext = NULL;
			transfer_queue.flags = 0;
			transfer_queue.queueFamilyIndex = transfer_queue_idx;
			transfer_queue.queueCount = 1;
			transfer_queue.pQueuePriorities = queue_priorities;
		}

		// Set up instance information
		std::vector<const char*> requested_extensions = { VK_KHR_SWAPCHAIN_EXTENSION_NAME };

		// Enable hardware features manually
		// Currently we require:
		// 1. Anisotropic sampling
		// 2. DXT support
		// 3. Indexable storage buffers
		VkPhysicalDeviceFeatures enabled_features{};
		if (pgpu->shader_types_support.allow_float16)
		{
			requested_extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME);
		}

		if (pgpu->conditional_render_support)
		{
			requested_extensions.push_back(VK_EXT_CONDITIONAL_RENDERING_EXTENSION_NAME);
		}

		if (pgpu->unrestricted_depth_range_support)
		{
			requested_extensions.push_back(VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME);
		}

		if (pgpu->external_memory_host_support)
		{
			requested_extensions.push_back(VK_KHR_EXTERNAL_MEMORY_EXTENSION_NAME);
			requested_extensions.push_back(VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME);
		}

		if (pgpu->stencil_export_support)
		{
			requested_extensions.push_back(VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME);
		}

		if (pgpu->sampler_mirror_clamped_support)
		{
			requested_extensions.push_back(VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME);
		}

		if (pgpu->descriptor_indexing_support)
		{
			requested_extensions.push_back(VK_KHR_MAINTENANCE3_EXTENSION_NAME);
			requested_extensions.push_back(VK_EXT_DESCRIPTOR_INDEXING_EXTENSION_NAME);
		}

		if (pgpu->framebuffer_loops_support)
		{
			requested_extensions.push_back(VK_EXT_ATTACHMENT_FEEDBACK_LOOP_LAYOUT_EXTENSION_NAME);
		}

		if (pgpu->barycoords_support)
		{
			requested_extensions.push_back(VK_KHR_FRAGMENT_SHADER_BARYCENTRIC_EXTENSION_NAME);
		}

		enabled_features.robustBufferAccess = VK_TRUE;
		enabled_features.fullDrawIndexUint32 = VK_TRUE;
		enabled_features.independentBlend = VK_TRUE;
		enabled_features.logicOp = VK_TRUE;
		enabled_features.depthClamp = VK_TRUE;
		enabled_features.depthBounds = VK_TRUE;
		enabled_features.wideLines = VK_TRUE;
		enabled_features.largePoints = VK_TRUE;
		enabled_features.shaderFloat64 = VK_TRUE;

		if (g_cfg.video.antialiasing_level != msaa_level::none)
		{
			// MSAA features
			enabled_features.sampleRateShading = VK_TRUE;
			enabled_features.alphaToOne = VK_TRUE;
			enabled_features.shaderStorageImageMultisample = VK_TRUE;
			// enabled_features.shaderStorageImageReadWithoutFormat = VK_TRUE;  // Unused currently, may be needed soon
			enabled_features.shaderStorageImageWriteWithoutFormat = VK_TRUE;
		}

		if (g_cfg.video.precise_zpass_count)
		{
			enabled_features.occlusionQueryPrecise = VK_TRUE;
		}

		// enabled_features.shaderSampledImageArrayDynamicIndexing = TRUE;  // Unused currently but will be needed soon
		enabled_features.shaderClipDistance = VK_TRUE;
		// enabled_features.shaderCullDistance = VK_TRUE;  // Alt notation of clip distance

		enabled_features.samplerAnisotropy = VK_TRUE;
		enabled_features.textureCompressionBC = VK_TRUE;
		enabled_features.shaderStorageBufferArrayDynamicIndexing = VK_TRUE;

		// Optionally disable unsupported stuff
		if (!pgpu->features.shaderStorageImageMultisample || !pgpu->features.shaderStorageImageWriteWithoutFormat)
		{
			// Disable MSAA if any of these two features are unsupported
			if (g_cfg.video.antialiasing_level != msaa_level::none)
			{
				rsx_log.error("Your GPU driver does not support some required MSAA features. MSAA will be disabled.");
				g_cfg.video.antialiasing_level.set(msaa_level::none);
			}

			enabled_features.sampleRateShading = VK_FALSE;
			enabled_features.alphaToOne = VK_FALSE;
			enabled_features.shaderStorageImageMultisample = VK_FALSE;
			enabled_features.shaderStorageImageWriteWithoutFormat = VK_FALSE;
		}

		if (!pgpu->features.shaderClipDistance)
		{
			rsx_log.error("Your GPU does not support shader clip distance. Graphics will not render correctly.");
			enabled_features.shaderClipDistance = VK_FALSE;
		}

		if (!pgpu->features.shaderStorageBufferArrayDynamicIndexing)
		{
			rsx_log.error("Your GPU does not support shader storage buffer array dynamic indexing. Graphics will not render correctly.");
			enabled_features.shaderStorageBufferArrayDynamicIndexing = VK_FALSE;
		}

		if (!pgpu->features.samplerAnisotropy)
		{
			rsx_log.error("Your GPU does not support anisotropic filtering. Graphics may not render correctly.");
			enabled_features.samplerAnisotropy = VK_FALSE;
		}

		if (!pgpu->features.shaderFloat64)
		{
			rsx_log.error("Your GPU does not support double precision floats in shaders. Graphics may not render correctly.");
			enabled_features.shaderFloat64 = VK_FALSE;
		}

		if (!pgpu->features.depthBounds)
		{
			rsx_log.error("Your GPU does not support depth bounds testing. Graphics may not render correctly.");
			enabled_features.depthBounds = VK_FALSE;
		}

		if (!pgpu->features.wideLines)
		{
			rsx_log.error("Your GPU does not support wide lines. Graphics may not render correctly.");
			enabled_features.wideLines = VK_FALSE;
		}

		if (!pgpu->features.sampleRateShading && enabled_features.sampleRateShading)
		{
			rsx_log.error("Your GPU does not support sample rate shading for multisampling. Graphics may be inaccurate when MSAA is enabled.");
			enabled_features.sampleRateShading = VK_FALSE;
		}

		if (!pgpu->features.alphaToOne && enabled_features.alphaToOne)
		{
			// AMD proprietary drivers do not expose alphaToOne support
			rsx_log.error("Your GPU does not support alpha-to-one for multisampling. Graphics may be inaccurate when MSAA is enabled.");
			enabled_features.alphaToOne = VK_FALSE;
		}

		if (!pgpu->features.occlusionQueryPrecise && enabled_features.occlusionQueryPrecise)
		{
			rsx_log.error("Your GPU does not support precise occlusion queries. Graphics may not render correctly.");
			enabled_features.occlusionQueryPrecise = VK_FALSE;
		}

#ifdef __APPLE__
		if (!pgpu->features.logicOp)
		{
			rsx_log.error("Your GPU does not support framebuffer logical operations. Graphics may not render correctly.");
			enabled_features.logicOp = VK_FALSE;
		}
#endif

		if (pgpu->get_driver_vendor() == driver_vendor::ANV &&
			pgpu->descriptor_update_after_bind_mask & (1 << VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER))
		{
			// Just disable robust access for now. I'll revisit after ARC launches.
			rsx_log.error("Robust buffer access is broken when enabled with EXT_descriptor_indexing on ANV");
			enabled_features.robustBufferAccess = VK_FALSE;
		}

		VkDeviceCreateInfo device = {};
		device.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
		device.pNext = nullptr;
		device.queueCreateInfoCount = ::size32(device_queues);
		device.pQueueCreateInfos = device_queues.data();
		device.enabledLayerCount = 0;
		device.ppEnabledLayerNames = nullptr; // Deprecated
		device.enabledExtensionCount = ::size32(requested_extensions);
		device.ppEnabledExtensionNames = requested_extensions.data();
		device.pEnabledFeatures = &enabled_features;

		VkPhysicalDeviceFloat16Int8FeaturesKHR shader_support_info{};
		if (pgpu->shader_types_support.allow_float16)
		{
			// Allow use of f16 type in shaders if possible
			shader_support_info.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR;
			shader_support_info.shaderFloat16 = VK_TRUE;
			shader_support_info.pNext = const_cast<void*>(device.pNext);
			device.pNext = &shader_support_info;

			rsx_log.notice("GPU/driver supports float16 data types natively. Using native float16_t variables if possible.");
		}
		else
		{
			rsx_log.notice("GPU/driver lacks support for float16 data types. All float16_t arithmetic will be emulated with float32_t.");
		}

		VkPhysicalDeviceDescriptorIndexingFeatures indexing_features{};
		if (pgpu->descriptor_indexing_support)
		{
#define SET_DESCRIPTOR_BITFLAG(field, bit) if (pgpu->descriptor_update_after_bind_mask & (1ull << bit)) indexing_features.field = VK_TRUE
			SET_DESCRIPTOR_BITFLAG(descriptorBindingUniformBufferUpdateAfterBind, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER);
			SET_DESCRIPTOR_BITFLAG(descriptorBindingSampledImageUpdateAfterBind, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER);
			SET_DESCRIPTOR_BITFLAG(descriptorBindingSampledImageUpdateAfterBind, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE);
			SET_DESCRIPTOR_BITFLAG(descriptorBindingStorageImageUpdateAfterBind, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE);
			SET_DESCRIPTOR_BITFLAG(descriptorBindingStorageBufferUpdateAfterBind, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
			SET_DESCRIPTOR_BITFLAG(descriptorBindingUniformTexelBufferUpdateAfterBind, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER);
			SET_DESCRIPTOR_BITFLAG(descriptorBindingStorageTexelBufferUpdateAfterBind, VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER);
#undef SET_DESCRIPTOR_BITFLAG

			indexing_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES_EXT;
			indexing_features.pNext = const_cast<void*>(device.pNext);
			device.pNext = &indexing_features;
		}

		VkPhysicalDeviceAttachmentFeedbackLoopLayoutFeaturesEXT fbo_loop_features{};
		if (pgpu->framebuffer_loops_support)
		{
			fbo_loop_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ATTACHMENT_FEEDBACK_LOOP_LAYOUT_FEATURES_EXT;
			fbo_loop_features.attachmentFeedbackLoopLayout = VK_TRUE;
			fbo_loop_features.pNext = const_cast<void*>(device.pNext);
			device.pNext = &fbo_loop_features;
		}

		CHECK_RESULT_EX(vkCreateDevice(*pgpu, &device, nullptr, &dev), message_on_error);

		// Initialize queues
		vkGetDeviceQueue(dev, graphics_queue_idx, 0, &m_graphics_queue);
		vkGetDeviceQueue(dev, transfer_queue_idx, transfer_queue_sub_index, &m_transfer_queue);

		if (present_queue_idx != umax)
		{
			vkGetDeviceQueue(dev, present_queue_idx, 0, &m_present_queue);
		}

		// Import optional function endpoints
		if (pgpu->conditional_render_support)
		{
			_vkCmdBeginConditionalRenderingEXT = reinterpret_cast<PFN_vkCmdBeginConditionalRenderingEXT>(vkGetDeviceProcAddr(dev, "vkCmdBeginConditionalRenderingEXT"));
			_vkCmdEndConditionalRenderingEXT = reinterpret_cast<PFN_vkCmdEndConditionalRenderingEXT>(vkGetDeviceProcAddr(dev, "vkCmdEndConditionalRenderingEXT"));
		}

		if (pgpu->debug_utils_support)
		{
			_vkSetDebugUtilsObjectNameEXT = reinterpret_cast<PFN_vkSetDebugUtilsObjectNameEXT>(vkGetDeviceProcAddr(dev, "vkSetDebugUtilsObjectNameEXT"));
			_vkQueueInsertDebugUtilsLabelEXT = reinterpret_cast<PFN_vkQueueInsertDebugUtilsLabelEXT>(vkGetDeviceProcAddr(dev, "vkQueueInsertDebugUtilsLabelEXT"));
			_vkCmdInsertDebugUtilsLabelEXT = reinterpret_cast<PFN_vkCmdInsertDebugUtilsLabelEXT>(vkGetDeviceProcAddr(dev, "vkCmdInsertDebugUtilsLabelEXT"));
		}

		memory_map = vk::get_memory_mapping(pdev);
		m_formats_support = vk::get_optimal_tiling_supported_formats(pdev);
		m_pipeline_binding_table = vk::get_pipeline_binding_table(pdev);

		if (pgpu->external_memory_host_support)
		{
			memory_map._vkGetMemoryHostPointerPropertiesEXT = reinterpret_cast<PFN_vkGetMemoryHostPointerPropertiesEXT>(vkGetDeviceProcAddr(dev, "vkGetMemoryHostPointerPropertiesEXT"));
		}

		if (g_cfg.video.disable_vulkan_mem_allocator)
			m_allocator = std::make_unique<vk::mem_allocator_vk>(dev, pdev);
		else
			m_allocator = std::make_unique<vk::mem_allocator_vma>(dev, pdev);
	}

	void render_device::destroy()
	{
		if (g_render_device == this)
		{
			g_render_device = nullptr;
		}

		if (dev && pgpu)
		{
			if (m_allocator)
			{
				m_allocator->destroy();
				m_allocator.reset();
			}

			vkDestroyDevice(dev, nullptr);
			dev = nullptr;
			memory_map = {};
			m_formats_support = {};
		}
	}

	const VkFormatProperties render_device::get_format_properties(VkFormat format) const
	{
		auto found = pgpu->format_properties.find(format);
		if (found != pgpu->format_properties.end())
		{
			return found->second;
		}

		auto& props = pgpu->format_properties[format];
		vkGetPhysicalDeviceFormatProperties(*pgpu, format, &props);
		return props;
	}

	bool render_device::get_compatible_memory_type(u32 typeBits, u32 desired_mask, u32* type_index) const
	{
		VkPhysicalDeviceMemoryProperties mem_infos = pgpu->get_memory_properties();

		for (u32 i = 0; i < 32; i++)
		{
			if ((typeBits & 1) == 1)
			{
				if ((mem_infos.memoryTypes[i].propertyFlags & desired_mask) == desired_mask)
				{
					if (type_index)
					{
						*type_index = i;
					}

					return true;
				}
			}

			typeBits >>= 1;
		}

		return false;
	}

	void render_device::rebalance_memory_type_usage()
	{
		// Rebalance device local memory types
		memory_map.device_local.rebalance();
	}

	// Shared Util
	memory_type_mapping get_memory_mapping(const vk::physical_device& dev)
	{
		VkPhysicalDevice pdev = dev;
		VkPhysicalDeviceMemoryProperties memory_properties;
		vkGetPhysicalDeviceMemoryProperties(pdev, &memory_properties);

		memory_type_mapping result;
		result.device_local_total_bytes = 0;
		result.host_visible_total_bytes = 0;
		result.device_bar_total_bytes = 0;

		// Sort the confusingly laid out heap-type map into something easier to scan.
		// Not performance-critical, this method is called once at initialization.
		struct memory_type
		{
			u32 type_index;
			VkFlags flags;
			VkDeviceSize size;
		};

		struct heap_type_map_entry
		{
			VkMemoryHeap heap;
			std::vector<memory_type> types;
		};

		std::vector<heap_type_map_entry> memory_heap_map;
		for (u32 i = 0; i < memory_properties.memoryHeapCount; ++i)
		{
			memory_heap_map.push_back(
			{
				.heap = memory_properties.memoryHeaps[i],
				.types = {}
			});
		}

		for (u32 i = 0; i < memory_properties.memoryTypeCount; i++)
		{
			auto& type_info = memory_properties.memoryTypes[i];
			memory_heap_map[type_info.heapIndex].types.push_back({ i, type_info.propertyFlags, 0 });
		}

		auto find_memory_type_with_property = [&memory_heap_map](VkFlags desired_flags, VkFlags excluded_flags)
		{
			std::vector<memory_type> results;

			for (auto& heap : memory_heap_map)
			{
				for (auto &type : heap.types)
				{
					if (((type.flags & desired_flags) == desired_flags) && !(type.flags & excluded_flags))
					{
						// Match, only once allowed per heap!
						results.push_back({ type.type_index, type.flags, heap.heap.size });
						break;
					}
				}
			}

			return results;
		};

		auto device_local_types = find_memory_type_with_property(
			VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
			(VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD | VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD));
		auto host_coherent_types = find_memory_type_with_property(
			(VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT),
			0);
		auto bar_memory_types = find_memory_type_with_property(
			(VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT),
			0);

		if (host_coherent_types.empty())
		{
			rsx_log.warning("[Performance Warning] Could not identify a cached upload heap. Will fall back to uncached transport.");
			host_coherent_types = find_memory_type_with_property(
				(VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT),
				0);
		}

		ensure(!device_local_types.empty());
		ensure(!host_coherent_types.empty());

		// BAR heap, currently parked for future use, I have some plans for it (kd-11)
		for (auto& type : bar_memory_types)
		{
			result.device_bar.push(type.type_index, type.size);
			result.device_bar_total_bytes += type.size;
		}

		// Generic VRAM access, requires some minor prioritization based on flags
		// Most devices have a 'PURE' device local type, pin that as the first priority
		// Internally, there will be some reshuffling based on memory load later, but this is rare
		if (device_local_types.size() > 1)
		{
			std::sort(device_local_types.begin(), device_local_types.end(), [](const auto& a, const auto& b)
			{
				if (a.flags == b.flags)
				{
					return a.size > b.size;
				}

				return (a.flags == VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) || (b.flags != VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT && a.size > b.size);
			});
		}

		for (auto& type : device_local_types)
		{
			result.device_local.push(type.type_index, type.size);
			result.device_local_total_bytes += type.size;
		}

		// Sort upload heap entries based on size.
		if (host_coherent_types.size() > 1)
		{
			std::sort(host_coherent_types.begin(), host_coherent_types.end(), FN(x.size > y.size));
		}

		for (auto& type : host_coherent_types)
		{
			result.host_visible_coherent.push(type.type_index, type.size);
			result.host_visible_total_bytes += type.size;
		}

		rsx_log.notice("Detected %llu MB of device local memory", result.device_local_total_bytes / (0x100000));
		rsx_log.notice("Detected %llu MB of host coherent memory", result.host_visible_total_bytes / (0x100000));
		rsx_log.notice("Detected %llu MB of BAR memory", result.device_bar_total_bytes / (0x100000));

		return result;
	}

	gpu_formats_support get_optimal_tiling_supported_formats(const physical_device& dev)
	{
		const auto test_format_features = [&dev](VkFormat format, VkFlags required_features, VkBool32 linear_features) -> bool
		{
			VkFormatProperties props;
			vkGetPhysicalDeviceFormatProperties(dev, format, &props);

			const auto supported_features_mask = (linear_features) ? props.linearTilingFeatures : props.optimalTilingFeatures;
			return (supported_features_mask & required_features) == required_features;
		};

		gpu_formats_support result = {};
		const VkFlags required_zbuffer_features = (VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT);
		const VkFlags required_colorbuffer_features = (VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT | VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT);

		// Check supported depth formats
		result.d24_unorm_s8 = test_format_features(VK_FORMAT_D24_UNORM_S8_UINT, required_zbuffer_features, VK_FALSE);
		result.d32_sfloat_s8 = test_format_features(VK_FORMAT_D32_SFLOAT_S8_UINT, required_zbuffer_features, VK_FALSE);

		// Hide d24_s8 if force high precision z buffer is enabled
		if (g_cfg.video.force_high_precision_z_buffer && result.d32_sfloat_s8)
		{
			result.d24_unorm_s8 = false;
		}

		// Checks if linear BGRA8 images can be used for present
		result.bgra8_linear = test_format_features(VK_FORMAT_B8G8R8A8_UNORM, VK_FORMAT_FEATURE_BLIT_SRC_BIT, VK_TRUE);

		// Check if device supports RGBA8 format for rendering
		if (!test_format_features(VK_FORMAT_R8G8B8A8_UNORM, required_colorbuffer_features, VK_FALSE))
		{
			// Non-fatal. Most games use BGRA layout due to legacy reasons as old GPUs typically supported BGRA and RGBA was emulated.
			rsx_log.error("Your GPU and/or driver does not support RGBA8 format. This can cause problems in some rare games that use this memory layout.");
		}

		// Check if linear RGBA8 images can be used for present
		result.argb8_linear = test_format_features(VK_FORMAT_R8G8B8A8_UNORM, VK_FORMAT_FEATURE_BLIT_SRC_BIT, VK_TRUE);

		return result;
	}

	pipeline_binding_table get_pipeline_binding_table(const vk::physical_device& dev)
	{
		pipeline_binding_table result{};

		// Need to check how many samplers are supported by the driver
		const auto usable_samplers = std::min(dev.get_limits().maxPerStageDescriptorSampledImages, 32u);
		result.vertex_textures_first_bind_slot = result.textures_first_bind_slot + usable_samplers;
		result.total_descriptor_bindings = result.vertex_textures_first_bind_slot + 4;
		return result;
	}
}
